{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d3ba9310",
   "metadata": {},
   "source": [
    "### 039 数值标准化 - 读取CSV实现数值标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "372ab8b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "81ef9758",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>var1</th>\n",
       "      <th>var2</th>\n",
       "      <th>var3</th>\n",
       "      <th>var4</th>\n",
       "      <th>var5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12.47</td>\n",
       "      <td>18.60</td>\n",
       "      <td>81.09</td>\n",
       "      <td>481.9</td>\n",
       "      <td>0.09965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18.94</td>\n",
       "      <td>21.31</td>\n",
       "      <td>123.60</td>\n",
       "      <td>1130.0</td>\n",
       "      <td>0.09009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15.46</td>\n",
       "      <td>19.48</td>\n",
       "      <td>101.70</td>\n",
       "      <td>748.9</td>\n",
       "      <td>0.10920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12.40</td>\n",
       "      <td>17.68</td>\n",
       "      <td>81.47</td>\n",
       "      <td>467.8</td>\n",
       "      <td>0.10540</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11.54</td>\n",
       "      <td>14.44</td>\n",
       "      <td>74.65</td>\n",
       "      <td>402.9</td>\n",
       "      <td>0.09984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>14.64</td>\n",
       "      <td>15.24</td>\n",
       "      <td>95.77</td>\n",
       "      <td>651.9</td>\n",
       "      <td>0.11320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>14.45</td>\n",
       "      <td>20.22</td>\n",
       "      <td>94.49</td>\n",
       "      <td>642.7</td>\n",
       "      <td>0.09872</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>11.04</td>\n",
       "      <td>16.83</td>\n",
       "      <td>70.92</td>\n",
       "      <td>373.2</td>\n",
       "      <td>0.10770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>19.81</td>\n",
       "      <td>22.15</td>\n",
       "      <td>130.00</td>\n",
       "      <td>1260.0</td>\n",
       "      <td>0.09831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>10.26</td>\n",
       "      <td>12.22</td>\n",
       "      <td>65.75</td>\n",
       "      <td>321.6</td>\n",
       "      <td>0.09996</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>143 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      var1   var2    var3    var4     var5\n",
       "0    12.47  18.60   81.09   481.9  0.09965\n",
       "1    18.94  21.31  123.60  1130.0  0.09009\n",
       "2    15.46  19.48  101.70   748.9  0.10920\n",
       "3    12.40  17.68   81.47   467.8  0.10540\n",
       "4    11.54  14.44   74.65   402.9  0.09984\n",
       "..     ...    ...     ...     ...      ...\n",
       "138  14.64  15.24   95.77   651.9  0.11320\n",
       "139  14.45  20.22   94.49   642.7  0.09872\n",
       "140  11.04  16.83   70.92   373.2  0.10770\n",
       "141  19.81  22.15  130.00  1260.0  0.09831\n",
       "142  10.26  12.22   65.75   321.6  0.09996\n",
       "\n",
       "[143 rows x 5 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"./p039.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "39c13a96",
   "metadata": {},
   "outputs": [],
   "source": [
    "scaler = StandardScaler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4cbd4aa2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StandardScaler()"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.fit(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "02bbb9db",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_scaled = scaler.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d477b351",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.47853809, -0.21371678, -0.45835473, -0.51137989,  0.06357125],\n",
       "       [ 1.37424404,  0.38950001,  1.29239853,  1.40549685, -0.61597356],\n",
       "       [ 0.37769352, -0.01783827,  0.3904579 ,  0.27832245,  0.74240524]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_scaled[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9d942849",
   "metadata": {},
   "source": [
    "### 040 数值标准化 - 训练测试集数值标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "931fb61c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6ae80cdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"./p040-train.csv\")\n",
    "test = pd.read_csv(\"./p040-test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a49ec301",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>var1</th>\n",
       "      <th>var2</th>\n",
       "      <th>var3</th>\n",
       "      <th>var4</th>\n",
       "      <th>var5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12.89</td>\n",
       "      <td>13.12</td>\n",
       "      <td>81.89</td>\n",
       "      <td>515.9</td>\n",
       "      <td>0.06955</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13.40</td>\n",
       "      <td>20.52</td>\n",
       "      <td>88.64</td>\n",
       "      <td>556.7</td>\n",
       "      <td>0.11060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12.96</td>\n",
       "      <td>18.29</td>\n",
       "      <td>84.18</td>\n",
       "      <td>525.2</td>\n",
       "      <td>0.07351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>17.75</td>\n",
       "      <td>28.03</td>\n",
       "      <td>117.30</td>\n",
       "      <td>981.6</td>\n",
       "      <td>0.09997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20.58</td>\n",
       "      <td>22.14</td>\n",
       "      <td>134.70</td>\n",
       "      <td>1290.0</td>\n",
       "      <td>0.09090</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    var1   var2    var3    var4     var5\n",
       "0  12.89  13.12   81.89   515.9  0.06955\n",
       "1  13.40  20.52   88.64   556.7  0.11060\n",
       "2  12.96  18.29   84.18   525.2  0.07351\n",
       "3  17.75  28.03  117.30   981.6  0.09997\n",
       "4  20.58  22.14  134.70  1290.0  0.09090"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "4b53a6c7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>var1</th>\n",
       "      <th>var2</th>\n",
       "      <th>var3</th>\n",
       "      <th>var4</th>\n",
       "      <th>var5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12.47</td>\n",
       "      <td>18.60</td>\n",
       "      <td>81.09</td>\n",
       "      <td>481.9</td>\n",
       "      <td>0.09965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18.94</td>\n",
       "      <td>21.31</td>\n",
       "      <td>123.60</td>\n",
       "      <td>1130.0</td>\n",
       "      <td>0.09009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15.46</td>\n",
       "      <td>19.48</td>\n",
       "      <td>101.70</td>\n",
       "      <td>748.9</td>\n",
       "      <td>0.10920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12.40</td>\n",
       "      <td>17.68</td>\n",
       "      <td>81.47</td>\n",
       "      <td>467.8</td>\n",
       "      <td>0.10540</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11.54</td>\n",
       "      <td>14.44</td>\n",
       "      <td>74.65</td>\n",
       "      <td>402.9</td>\n",
       "      <td>0.09984</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    var1   var2    var3    var4     var5\n",
       "0  12.47  18.60   81.09   481.9  0.09965\n",
       "1  18.94  21.31  123.60  1130.0  0.09009\n",
       "2  15.46  19.48  101.70   748.9  0.10920\n",
       "3  12.40  17.68   81.47   467.8  0.10540\n",
       "4  11.54  14.44   74.65   402.9  0.09984"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ddbd3caa",
   "metadata": {},
   "outputs": [],
   "source": [
    "scaler = StandardScaler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c642b3f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StandardScaler()"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.fit(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e40cc326",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train = scaler.transform(train)\n",
    "df_test = scaler.transform(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "da651f15",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.34913849, -1.43851335, -0.41172595, -0.39047943, -1.86366229],\n",
       "       [-0.20468665,  0.31264011, -0.13367256, -0.27587995,  1.07807258],\n",
       "       [-0.32931176, -0.21507235, -0.31739376, -0.36435749, -1.57987983]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d5013cec",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.46809882, -0.14171322, -0.44468042, -0.485979  ,  0.29337107],\n",
       "       [ 1.36445686,  0.49958757,  1.30643802,  1.3344113 , -0.39171992],\n",
       "       [ 0.37878549,  0.06653205,  0.40430925,  0.26397349,  0.97774544]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a3533029",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.24650551e+01, 1.78572684e+01, 5.89319980e+02, 1.26752058e+05,\n",
       "       1.94723904e-04])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.var_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2d8d24fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.41226643e+01, 1.91988498e+01, 9.18850235e+01, 6.54919484e+02,\n",
       "       9.55561972e-02])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.mean_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5323173",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
